import requests
from lxml import etree
import time
import random


class MaoyanXpathSpider:

    def __init__(self):
        self.url = 'https://www.maoyan.com/board/4?offset={}'
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50'}

    def get_html(self, url):
        html = requests.get(url=url, headers=self.headers).text
        # print("网页数据" + html)
        self.parse_html(html)

    def parse_html(self, html):
        # xpath提取数据

        p = etree.HTML(html)
        print("--------------------")
        print(p)
        print("----------------")
        item = {}
        # 基准xpath：匹配每个电影信息的dd节点对象列表
        dd_list = p.xpath('//dl[@class="board-wrapper"]/dd')
        for dd in dd_list:
            item['name'] = dd.xpath('.//p[@class="name"]/a/@title')[0].strip()
            item['star'] = dd.xpath('.//p[@clas="star"]/text()')[0].strip()
            item['time'] = dd.xpath('.//p[@clas="releasetime"]/text()')[0].strip()
            print(item)

    def run(self):
        for i in range(0, 90, 10):
            url = self.url.format(i)
            print(url)
            self.get_html(url)
            time.sleep(random.randint(1, 2))


if __name__ == '__main__':
    spider = MaoyanXpathSpider()
    spider.run()
